/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2003 - 2004 by Myricom, Inc.  All rights reserved.          *
 *************************************************************************/

static const char __idstring[] = "@(#)$Id: packet.c,v 1.36 2006/03/31 19:34:36 karen Exp $";

/*
 *  Routines related to sending and receiving packets
 */

#include <sys/types.h>
#include <sys/socket.h>

#include <netinet/in.h>

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "mx_auto_config.h"
#include "myriexpress.h"
#include "internal.h"
#include "packet.h"
#include "tcp.h"


static inline mx_post_type_t
mx_pkt_to_sr(mx_pkt_type_t pkt_type){
  switch (pkt_type){
  case MX_PKT_TYPE_SEND:
    return MX_SR_TYPE_SEND;
  case MX_PKT_TYPE_ISSEND:
    return MX_SR_TYPE_ISSEND;
  case MX_PKT_TYPE_ISSEND_ACK:
    return MX_SR_TYPE_ISSEND_ACK;
  case MX_PKT_TYPE_BARRIER:
    return MX_SR_TYPE_BARRIER;
  case MX_PKT_TYPE_BCAST:
    return MX_SR_TYPE_BCAST;
  case MX_PKT_TYPE_BARRIER_ACK:
    return MX_SR_TYPE_BARRIER_ACK;
  case MX_PKT_TYPE_PUT:
    return MX_SR_TYPE_PUT;
  case MX_PKT_TYPE_PUT_ACK:
    return MX_SR_TYPE_PUT_ACK;
  case MX_PKT_TYPE_GET:
    return MX_SR_TYPE_GET;
  case MX_PKT_TYPE_GET_DATA:
    return MX_SR_TYPE_GET_DATA;
  case MX_PKT_TYPE_CONN:
    return MX_SR_TYPE_SEND; /* dummy */
  }
  return MX_SR_TYPE_SEND; /* dummy */
}


/*
 * helper routine to turn an IP address into a dotted string
 */
char *
ipaddr2str(uint32_t ipaddr)
{
  static char str[80];
  char *p;
    
  p = (char *)(&ipaddr);
  sprintf(str, "%d.%d.%d.%d", p[0],p[1],p[2],p[3]);
  return str;
}

int
mx_send_header(int s,
               mx_pkt_type_t type,
               struct mx_endpoint *endpoint,
               uint32_t length)
{
  int n;
  struct mx_pkt_hdr hdr;
    
  switch (type) {
  case MX_PKT_TYPE_CONN:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_CONN\n"));
    break;
  case MX_PKT_TYPE_SEND:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PTK_TYPE_SEND\n"));
    break;
  case MX_PKT_TYPE_ISSEND:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_ISSEND\n"));
    break;
  case MX_PKT_TYPE_BARRIER:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_BARRIER\n"));
    break;
  case MX_PKT_TYPE_BCAST:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_BCAST\n"));
    break;
  case MX_PKT_TYPE_BARRIER_ACK:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_BARRIER_ACK\n"));
    break;
  case MX_PKT_TYPE_ISSEND_ACK:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_ISSEND_ACK\n"));
    break;
  case MX_PKT_TYPE_PUT:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_PUT\n"));
    break;
  case MX_PKT_TYPE_PUT_ACK:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_PUT_ACK\n"));
    break;
  case MX_PKT_TYPE_GET:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_GET\n"));
    break;
  case MX_PKT_TYPE_GET_DATA:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending MX_PKT_TYPE_GET_DATA\n"));
    break;
  default:
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("sending unknown pkt type %d\n", type));
    break;
  }
    
  /* fill in the header */
  hdr.type = type;
  hdr.source = endpoint->my_address;
  hdr.length = htonl(length);
    
  /* send it */
  n = mx_sock_write(s, &hdr, sizeof(hdr));
    
  if (n == -1) {
    perror("send hdr failed");
    return -1;
  } else {
    return 0;
  }
}

void
mx_receive_data(struct mx_endpoint *endpoint,
                struct mx_address_desc *ep)
{
  struct mx_pkt_hdr hdr;
  int rc;
  uint32_t len;
  struct mx_send_hdr send_hdr;
  mx_sr_type_t type;

  /* data is pending on the socket from this address, read and process it */
  rc = mx_sock_read(ep->recv_sock, &hdr, sizeof(hdr));
  if (rc < 0) {
    mx_close_recv_socket(endpoint, ep);
    return;
  }
    
  /* extract stuff that's in network order */
  len = ntohl(hdr.length);

  /* receive the send sub-header */
  rc = mx_sock_read(ep->recv_sock, &send_hdr, sizeof (send_hdr));
  if (rc == -1) {
    mx_close_recv_socket(endpoint, ep);
    return;
  }

  len -= sizeof(send_hdr);

  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("got recv, len = %d\n", len));

  type = mx_pkt_to_sr(hdr.type);

  /* process the message based on the packet type */
  switch (hdr.type) {
  case MX_PKT_TYPE_SEND:
  case MX_PKT_TYPE_ISSEND:
  case MX_PKT_TYPE_BCAST:
  case MX_PKT_TYPE_BARRIER_ACK:
  case MX_PKT_TYPE_GET_DATA:
    {
      struct mx_post *post;
      uint32_t match_a = ntohl(send_hdr.match_a);
      uint32_t match_b = ntohl(send_hdr.match_b);
      uint64_t match_info = (uint64_t)match_a << 32 | match_b;
      mx_request_t sender_request = send_hdr.request;

      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("it's not a barrier match=%x:%x\n", match_a,match_b));
      /* search for a matching receive for this pile of data */
      post = mx_find_recv(endpoint, match_info, type);
        
      if (post != NULL) {
        /* matching recv has already been posted */
        mx_fulfill_recv(post, ep, len, match_info, type, sender_request);
      } else {
        /* unexpected message */
        mx_queue_msg(endpoint, ep, match_info, sender_request, len, type);
      }
    }
    break;

  case MX_PKT_TYPE_ISSEND_ACK:
    {
      mx_process_issend_ack(endpoint, ep);
    }
    break;

  case MX_PKT_TYPE_BARRIER:
    {
      uint32_t match_a = ntohl(send_hdr.match_a);
      uint32_t match_b = ntohl(send_hdr.match_b);
      uint64_t match_info = (uint64_t)match_a << 32 | match_b;
      MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("it's a barrier\n"));
      mx_process_barrier(endpoint, ep, match_info, len, type);
    }
    break;
        
  case MX_PKT_TYPE_PUT:
    {
      mx_request_t sender_request = send_hdr.request;
      mx_process_put(endpoint, ep, len, send_hdr.rdma_handle,
                     send_hdr.rdma_offset, sender_request);
    }
    break;
        
  case MX_PKT_TYPE_PUT_ACK:
    {
      mx_process_put_ack(endpoint, ep);
    }
    break;

  case MX_PKT_TYPE_GET:
    {
      mx_request_t sender_request = send_hdr.request;
      mx_process_get(endpoint, ep, sender_request);
    }
    break;

  default:
    {
      struct mx_lib_address *ap;
      ap = (struct mx_lib_address *) &ep->address;
            
      fprintf(stderr, "Unknown pkt type(%d) from 0x%x:%d, len = %d\n",
              hdr.type,
              (unsigned int)ntohl(ap->ipaddr_n),
              ntohs(ap->ipport_n),
              len);
      mx_trash_packet(ep->recv_sock, len);
    }
    break;
  }
    
}

/*
 * close an addresss socket and remove it from the select list
 */
void
mx_close_recv_socket(struct mx_endpoint *endpoint,
                     struct mx_address_desc *ep)
{
  close(ep->recv_sock);
  mx_endpoint_sock_remove(endpoint, ep->recv_sock);
  ep->recv_sock = -1;
}

/*
 * read and throw away len bytes from a socket
 */
void
mx_trash_packet(int s,
                uint32_t len)
{
  char buf[1024];
  int n;

  while (len > 0) {

    /* read max of len of sizeof(buf) */
    n = (len > sizeof (buf)) ? sizeof (buf) : len;

    n = mx_sock_read(s, buf, n);
    if (n <= 0) {
      return;
    }
    len -= n;
  }
}

void
mx_establish_connection(struct mx_endpoint *endpoint)
{
  struct sockaddr_in saddr;
  socklen_t saddr_len;
  struct mx_pkt_hdr hdr;
  struct mx_address_desc *ep;
  int s;
  int rc;
    
  saddr_len = sizeof(saddr);
  s = accept(endpoint->socket, (struct sockaddr *)&saddr, &saddr_len);
  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("accept(%d) returns %d\n", endpoint->socket, s));
    
  /* if error, indicate it and try again */
  if (s == -1) {
    perror("accept");
    exit(1);		/* XXX for now */
        
    /* accept successful - remote address should now send us a connect
     * message containing his address informatiom.
     */
  } else {
        
    rc = mx_sock_read(s, &hdr, sizeof(hdr));
        
    /* make sure this is a CONN packet */
    if (hdr.type != MX_PKT_TYPE_CONN) {
      fprintf(stderr, "Unexpected packet type (%d) at connection\n", hdr.type);
      close(s);
      return;
    }
        
    /* find address structure, creating it if necessary */
    ep = mx_get_address_desc(endpoint, hdr.source);
    if (ep == NULL) {
      close(s);
      return;
    }
        
    /* If this address already has an associated socket, replace it
     * with this one
     */
    if (ep->recv_sock != -1) {
      fprintf(stderr, "duplicate connection from ep 0x%x\n",
              *(uint32_t *)&hdr.source);
      mx_endpoint_sock_remove(endpoint, ep->recv_sock);
      close(ep->recv_sock);
    }
        
    ep->recv_sock = s;   /* fill in value of receive socket */
    mx_endpoint_sock_insert(endpoint, s);
        
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("accepted connection from %s\n", 
              ipaddr2str(*(uint32_t *)&hdr.source)));
  }
}

int
get_sock_for_address(struct mx_endpoint *endpoint,
                     mx_endpoint_addr_t dest)
{
  int rc;
  struct mx_address_desc *ed;
  struct sockaddr_in saddr;
  struct mx_lib_address *dp;
    
  /* refer to dest with internal structure type */
  dp = (struct mx_lib_address *)&dest;
  MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("getting socket for %s\n", ipaddr2str(dp->ipaddr_n)));
  /*
   * Get entry for this address
   */
  ed = mx_get_address_desc(endpoint, dest);
  if (ed == NULL) {
    return -1;		/* can't get one for some reason */
  }
    
  /* If no socket for this address, open one and connect it */
  if (ed->send_sock == -1) {
    ed->send_sock = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
    if (ed->send_sock == -1) {
            
      perror("socket"); /* XXX */
      return -1;
    }
        
    /* XXX setsockopt, SO_LINGER, etc... */
        
    /* build socket address for destination */
    saddr.sin_family = PF_INET;
    saddr.sin_port = dp->ipport_n;
    memcpy(&saddr.sin_addr.s_addr, &dp->ipaddr_n, sizeof(dp->ipaddr_n));
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("ipaddr = %s, port = %d\n", ipaddr2str(dp->ipaddr_n), 
              ntohs(dp->ipport_n)));
        
    /* connect to the remote socket */
    rc = connect(ed->send_sock, (struct sockaddr *)&saddr, sizeof(saddr));
    if (rc == -1) {
      perror("connect");	/* XXX */
      close(ed->send_sock);
      ed->send_sock = -1;
      exit(1);
    }
        
    MX_DEBUG_PRINT(MX_DEBUG_TCP_LIB,("connected\n"));
        
    /* first thing we do after connecting is to send a CONN packet to
     * identify ourselves.
     */
    rc = mx_send_header(ed->send_sock, MX_PKT_TYPE_CONN, endpoint, 0);
    if (rc == -1) {
      perror("write MX_PKT_TYPE_CONN");
      close(ed->send_sock);
      ed->send_sock = -1;
      return -1;
    }
  }
    
  /* socket opened and connected, ready to use */
  return ed->send_sock;
}

/*
 * read data until we fill out buffer or have some error
 */
int
mx_sock_read(int s,
             void *buf,
             uint32_t len)
{
  int n;
  int nleft;

  nleft = len;
  while (nleft > 0) {
    n = read(s, buf, nleft);
    /* if any sort of read error, just hang up the socket */
    if (n <= 0) {
      perror("read in mx_sock_read");
      return -1;
    }

    /* indicate we read some stuff */
    buf += n;
    nleft -= n;
  }

  return len;
}

int
mx_sock_write(int s, void *buf, uint32_t len)
{
  int n = 0;
  while(n < len){
    int c;
    c = write(s, buf + n, len - n);
    if (c < 0){
      perror("write in mx_sock_write");
      abort();
      return c;
    }
    n += c;
  }
  return len;
}
